package com.shujia.spark

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

/**
 * @author yangjiming
 * @create 2021-05-10 13:21
 */
object Demo2WordCountSubmit {
  def main(args: Array[String]): Unit = {
    /**
     * 创建spark运行环境
     */
    //配置文件对象
    val conf = new SparkConf()
    conf.setAppName("wordcount")
    conf.setMaster("local")
    //构建spark 上下文对象
    val sc: SparkContext = new SparkContext(conf)

    //1、读取数据

    /**
     * RDD: 弹性的分布式数据集（可以看作是scala中的一个集合来使用）
     *
     */

    val linesRDD: RDD[String] = sc.textFile("spark/data/words")
    //linesRDD.foreach(println)

    val wordsRdd: RDD[String] = linesRDD.flatMap(line => line.split(","))
    //wordsRdd.foreach(println)
    val groupRDD: RDD[(String, Iterable[String])] = wordsRdd.groupBy(word => word)
    //groupRDD.foreach(println)
    val countRDD: RDD[String] = groupRDD.map(kv => {
      val word: String = kv._1
      val iter: Iterable[String] = kv._2
      val count: Int = iter.size
      word + "," + count
    })

    countRDD.foreach(println)
    countRDD.saveAsTextFile("spark/data/count")

  }

}
